{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/sherwin/anaconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
      "  return f(*args, **kwds)\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn import svm\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 获取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.read_csv(\"./data/train.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>pixel0</th>\n",
       "      <th>pixel1</th>\n",
       "      <th>pixel2</th>\n",
       "      <th>pixel3</th>\n",
       "      <th>pixel4</th>\n",
       "      <th>pixel5</th>\n",
       "      <th>pixel6</th>\n",
       "      <th>pixel7</th>\n",
       "      <th>pixel8</th>\n",
       "      <th>...</th>\n",
       "      <th>pixel774</th>\n",
       "      <th>pixel775</th>\n",
       "      <th>pixel776</th>\n",
       "      <th>pixel777</th>\n",
       "      <th>pixel778</th>\n",
       "      <th>pixel779</th>\n",
       "      <th>pixel780</th>\n",
       "      <th>pixel781</th>\n",
       "      <th>pixel782</th>\n",
       "      <th>pixel783</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 785 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \\\n",
       "0      1       0       0       0       0       0       0       0       0   \n",
       "1      0       0       0       0       0       0       0       0       0   \n",
       "2      1       0       0       0       0       0       0       0       0   \n",
       "3      4       0       0       0       0       0       0       0       0   \n",
       "4      0       0       0       0       0       0       0       0       0   \n",
       "\n",
       "   pixel8    ...     pixel774  pixel775  pixel776  pixel777  pixel778  \\\n",
       "0       0    ...            0         0         0         0         0   \n",
       "1       0    ...            0         0         0         0         0   \n",
       "2       0    ...            0         0         0         0         0   \n",
       "3       0    ...            0         0         0         0         0   \n",
       "4       0    ...            0         0         0         0         0   \n",
       "\n",
       "   pixel779  pixel780  pixel781  pixel782  pixel783  \n",
       "0         0         0         0         0         0  \n",
       "1         0         0         0         0         0  \n",
       "2         0         0         0         0         0  \n",
       "3         0         0         0         0         0  \n",
       "4         0         0         0         0         0  \n",
       "\n",
       "[5 rows x 785 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(42000, 785)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 确定特征值\\目标值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/sherwin/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: DeprecationWarning: \n",
      ".ix is deprecated. Please use\n",
      ".loc for label based indexing or\n",
      ".iloc for positional indexing\n",
      "\n",
      "See the documentation here:\n",
      "http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "train_image = train.ix[:, 1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pixel0</th>\n",
       "      <th>pixel1</th>\n",
       "      <th>pixel2</th>\n",
       "      <th>pixel3</th>\n",
       "      <th>pixel4</th>\n",
       "      <th>pixel5</th>\n",
       "      <th>pixel6</th>\n",
       "      <th>pixel7</th>\n",
       "      <th>pixel8</th>\n",
       "      <th>pixel9</th>\n",
       "      <th>...</th>\n",
       "      <th>pixel774</th>\n",
       "      <th>pixel775</th>\n",
       "      <th>pixel776</th>\n",
       "      <th>pixel777</th>\n",
       "      <th>pixel778</th>\n",
       "      <th>pixel779</th>\n",
       "      <th>pixel780</th>\n",
       "      <th>pixel781</th>\n",
       "      <th>pixel782</th>\n",
       "      <th>pixel783</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 784 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \\\n",
       "0       0       0       0       0       0       0       0       0       0   \n",
       "1       0       0       0       0       0       0       0       0       0   \n",
       "2       0       0       0       0       0       0       0       0       0   \n",
       "3       0       0       0       0       0       0       0       0       0   \n",
       "4       0       0       0       0       0       0       0       0       0   \n",
       "\n",
       "   pixel9    ...     pixel774  pixel775  pixel776  pixel777  pixel778  \\\n",
       "0       0    ...            0         0         0         0         0   \n",
       "1       0    ...            0         0         0         0         0   \n",
       "2       0    ...            0         0         0         0         0   \n",
       "3       0    ...            0         0         0         0         0   \n",
       "4       0    ...            0         0         0         0         0   \n",
       "\n",
       "   pixel779  pixel780  pixel781  pixel782  pixel783  \n",
       "0         0         0         0         0         0  \n",
       "1         0         0         0         0         0  \n",
       "2         0         0         0         0         0  \n",
       "3         0         0         0         0         0  \n",
       "4         0         0         0         0         0  \n",
       "\n",
       "[5 rows x 784 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_image.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/sherwin/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: DeprecationWarning: \n",
      ".ix is deprecated. Please use\n",
      ".loc for label based indexing or\n",
      ".iloc for positional indexing\n",
      "\n",
      "See the documentation here:\n",
      "http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "train_label = train.ix[:, 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1\n",
       "1    0\n",
       "2    1\n",
       "3    4\n",
       "4    0\n",
       "Name: label, dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_label.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 查看具体图像"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/sherwin/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: DeprecationWarning: \n",
      ".ix is deprecated. Please use\n",
      ".loc for label based indexing or\n",
      ".iloc for positional indexing\n",
      "\n",
      "See the documentation here:\n",
      "http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "num = train_image.ix[0,].values.reshape(28, 28)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPwAAAD6CAYAAACF8ip6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAABcdJREFUeJzt3cuL1XUcxvG5iYKXdErHUsswCdx0IbpYFAgGBdFGA5OgCINEUiMS2rXMC5WFuSsKwiiKCqQIihbpJhVKKlwoLSwSp4WVpuPM6Q/I3+fU6Jlx5nm9to+/c87mzXfx9czpbrVaXUCGnvH+AMDYETwEETwEETwEETwEETwEETwEETwEETwE6ev0G6zsWe2/8kGHfTHyfvd/+XdOeAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAgieAjS8Z+LhrE055v+xm3P9V+Wz9700vpyn//qvlF9psuJEx6CCB6CCB6CCB6CCB6CCB6CCB6CuIdnQhnYP6vcdy3a27gNtaaUz3a3RvWRJhQnPAQRPAQRPAQRPAQRPAQRPAQRPARxD89l5ejWu8p9z8Id5T61e2rjdufBNeWz17x1uNyHy3VicMJDEMFDEMFDEMFDEMFDEMFDENdyjKnfn6iv3fav2V7uM3qmlfu2wWWN28DjJ8tnh0+dKvfJwAkPQQQPQQQPQQQPQQQPQQQPQQQPQdzDc8n13nhD4/bw5q/KZ69oc8/+3bn6S6ofb1/RuM0e3F8+m8AJD0EED0EED0EED0EED0EED0EED0Hcw/O/Dd1/W7mv2PF14/Zs/08X9d7rtm4s97lvu2uvOOEhiOAhiOAhiOAhiOAhiOAhiOAhiHt4/uW3Z5aX+4Etr5f7SFercTsydK589skfHiv3qz86Wu7nyxUnPAQRPAQRPAQRPAQRPAQRPAQRPARxDx+ob/G15b72qc879t6rv11X7otWHS539+wXxwkPQQQPQQQPQQQPQQQPQQQPQVzLTUK9A/PK/d5Pfyz3TXOOtHmH7nI9dv7vxm363pltXptOcsJDEMFDEMFDEMFDEMFDEMFDEMFDEPfwk9GsGeV8sT/Z3M6mWx9q3PoH/ZzzeHLCQxDBQxDBQxDBQxDBQxDBQxDBQxD38BNU38IFjdvtH9T37D1tvs/ezuZf7yj31pnm78MzvpzwEETwEETwEETwEETwEETwEETwEMQ9/AR1Yvf0xu2Fq74vnx1p89obf7m73I/dV58TI6dPt3kHxosTHoIIHoIIHoIIHoIIHoIIHoIIHoK4h79MVd937+rq6lq5YPR/W/7PkbPlfmDnLeU++7S/LT9ROeEhiOAhiOAhiOAhiOAhiOAhiGu5cdJ33aJyn/nuX+X+4rxDjdvJ4TPlsw9sf77cB97ZV+5MXE54CCJ4CCJ4CCJ4CCJ4CCJ4CCJ4COIefpz8vKa+hz+0+LVRv/aW4w+W+8BO9+ypnPAQRPAQRPAQRPAQRPAQRPAQRPAQxD18h5xYv7zcP3x6W5tXmFauG47f07gNru1v89qn2uxMVk54CCJ4CCJ4CCJ4CCJ4CCJ4CCJ4COIefpR6584t9+c2vlfu1/fV9+ztHHzj5sat/6ifc+bCnPAQRPAQRPAQRPAQRPAQRPAQxLXcKB1/dGm5PzLjs46+/7lZ3R19fSYnJzwEETwEETwEETwEETwEETwEETwEcQ8/Sj1D9T7UGi73Kd295X62Vb/BH0uaX39++STJnPAQRPAQRPAQRPAQRPAQRPAQRPAQxD38KM3bta/c39ywpNyn95wt95d3ryr3pa/U7w8X4oSHIIKHIIKHIIKHIIKHIIKHIIKHIO7hO+STZVde1PPzu9yzc+k54SGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CFId6vVGu/PAIwRJzwEETwEETwEETwEETwEETwEETwEETwEETwEETwEETwEETwEETwEETwEETwEETwEETwEETwEETwEETwEETwE+QdtyZqynP7eDgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.imshow(num)\n",
    "plt.axis(\"off\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def to_plot(n):\n",
    "    num = train_image.ix[n,].values.reshape(28, 28)\n",
    "    \n",
    "    plt.imshow(num)\n",
    "    plt.axis(\"off\")\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/sherwin/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: DeprecationWarning: \n",
      ".ix is deprecated. Please use\n",
      ".loc for label based indexing or\n",
      ".iloc for positional indexing\n",
      "\n",
      "See the documentation here:\n",
      "http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated\n",
      "  \n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPwAAAD6CAYAAACF8ip6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAABttJREFUeJzt3V+o33Udx/Hv73S2M9c2C3OszsYOztxFk1xLw6B/FzOIQAqcoskaBJoVDTGWEkQ3RRxButAso7pIFtugIAfFvKgwC91QM0xYw6mZZSKrsdM5Ozvn24XX3/cZ5/g7vx97PR63r33Pvmw897n47PxOr23bBsgwMugXAJaP4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CHIaL9/g50jN/ivfNBnR+YP9s7n1znhIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIYjgIcjooF+At94be64t9/9eXj9/8Jb7yn1qfkW5797/5c7tnc+15bMXP/yncmdpnPAQRPAQRPAQRPAQRPAQRPAQRPAQxD38kOptf1+5v/atc53b4asmy2fXjtR/7afnu7920zRNM1Lvz33+/s7tlbmp8tkb27vK/eIDR8u9PbfAu4dzwkMQwUMQwUMQwUMQwUMQwUMQ13IDMrpxvNw3//BEuf96vPvbSH8/vaZ89tu7Plfu7dG/lPtCTjy8vXP72yd+Uj77x3sfLPcrx+8o9/dMPl7u6ZzwEETwEETwEETwEETwEETwEETwEMQ9/IC0695e7g+MH1701973jdvLfd3R/n4U9NZ7Xu/crr5/V/nskx84UO5T4/OLeife5ISHIIKHIIKHIIKHIIKHIIKHIIKHIO7hB6R35n/lfmzmbLnvGFvZuU2/o/53fF25Lt25F1/u3KZ/9+Hy2dfef6bcLzs0vah34k1OeAgieAgieAgieAgieAgieAgieAjiHn5Aqrvqpmma7/z9U+V+aMujndtNXzxSPvvTS3eW+8Tk0+Xem9hY7sd3X9K5PXHzveWzp9u23N92Zrbc66dxwkMQwUMQwUMQwUMQwUMQwUMQwUMQ9/BD6pmX67vuZkv3tO+S4+Wj+26r969d3/3z3ZumaSY31J8dX1tdrtc+dmu5b37q2SX83jjhIYjgIYjgIYjgIYjgIYjgIYhruSG1Zc9fy337gZs6t/u21ddmH7+o/pHLkxueKvcHT42X+7HTE53bQ5v+UD579tX6x2izNE54CCJ4CCJ4CCJ4CCJ4CCJ4CCJ4COIefki1MzPlvv765zu3yYlPl89+c9uGcu8t8FHRY4efLPcX9l/ePS5wD09/OeEhiOAhiOAhiOAhiOAhiOAhiOAhiHv4C9C5ky+V+6oF9qWa/c9YX78+i+eEhyCChyCChyCChyCChyCChyCChyDu4XnL3f2xRwb9CnRwwkMQwUMQwUMQwUMQwUMQwUMQwUMQ9/AMlYv+5QzqJ3+6EETwEETwEETwEETwEETwEMS13JAaWbWq3Oenp5fpTZbX5l/8u9znluk9LlROeAgieAgieAgieAgieAgieAgieAjiHn5ARtauLffnv7e13Lfe9ufOrZ09u6h3Wg5Pz8yUe2/qwvz/BcPCCQ9BBA9BBA9BBA9BBA9BBA9BBA9B3MMPyEtfubLcV77Slns7N7jvDO9dXb/7Nat+1LntPX5j+ezYiycX80qcJyc8BBE8BBE8BBE8BBE8BBE8BBE8BHEPPyDf/8ID5f71u2+vv8B8/+7heytWlvud+39e7leNjXVupw+9u3x2rDlZ7iyNEx6CCB6CCB6CCB6CCB6CCB6CuJbrl2vqbyG9bPSxcj91Rf1vcf0h17XRTRvL/fgdm8r9utVPlPtHn/1M57b+Z8+Uz86XK0vlhIcggocggocggocggocggocggocg7uH75MRX6z/ajaNryn39R/5R7mc/+cHO7dR7629v/e7eh8r9utWz5b731e7fu2maZs2Xure5qanyWfrLCQ9BBA9BBA9BBA9BBA9BBA9BBA9B3MP3yab1byzp+d9u+2W5z/y4+658rLeifPb1uTPlvuPYnnK/9LMny72dfaHcGRwnPAQRPAQRPAQRPAQRPAQRPAQRPARxD98nsz/YUO47du8q92M7DpT7Pf/8UOf2q990b03TNBOP1N+T/q7H68+Ob8uVYeaEhyCChyCChyCChyCChyCChyCChyC9tu3vrerOkRtc20KfHZk/2DufX+eEhyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyCChyB9/5hqYHg44SGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CGI4CHI/wHdTvDVZ5w1iAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "to_plot(n=40)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据基本处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pixel0</th>\n",
       "      <th>pixel1</th>\n",
       "      <th>pixel2</th>\n",
       "      <th>pixel3</th>\n",
       "      <th>pixel4</th>\n",
       "      <th>pixel5</th>\n",
       "      <th>pixel6</th>\n",
       "      <th>pixel7</th>\n",
       "      <th>pixel8</th>\n",
       "      <th>pixel9</th>\n",
       "      <th>...</th>\n",
       "      <th>pixel774</th>\n",
       "      <th>pixel775</th>\n",
       "      <th>pixel776</th>\n",
       "      <th>pixel777</th>\n",
       "      <th>pixel778</th>\n",
       "      <th>pixel779</th>\n",
       "      <th>pixel780</th>\n",
       "      <th>pixel781</th>\n",
       "      <th>pixel782</th>\n",
       "      <th>pixel783</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 784 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \\\n",
       "0       0       0       0       0       0       0       0       0       0   \n",
       "1       0       0       0       0       0       0       0       0       0   \n",
       "2       0       0       0       0       0       0       0       0       0   \n",
       "3       0       0       0       0       0       0       0       0       0   \n",
       "4       0       0       0       0       0       0       0       0       0   \n",
       "\n",
       "   pixel9    ...     pixel774  pixel775  pixel776  pixel777  pixel778  \\\n",
       "0       0    ...            0         0         0         0         0   \n",
       "1       0    ...            0         0         0         0         0   \n",
       "2       0    ...            0         0         0         0         0   \n",
       "3       0    ...            0         0         0         0         0   \n",
       "4       0    ...            0         0         0         0         0   \n",
       "\n",
       "   pixel779  pixel780  pixel781  pixel782  pixel783  \n",
       "0         0         0         0         0         0  \n",
       "1         0         0         0         0         0  \n",
       "2         0         0         0         0         0  \n",
       "3         0         0         0         0         0  \n",
       "4         0         0         0         0         0  \n",
       "\n",
       "[5 rows x 784 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_image.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据归一化处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 对数据特征值归一化处理\n",
    "train_image = train_image.values / 255"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_label = train_label.values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据集分割"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train, x_val, y_train, y_val = train_test_split(train_image, train_label, train_size = 0.8, random_state=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(33600, 784) (8400, 784)\n"
     ]
    }
   ],
   "source": [
    "print(x_train.shape, x_val.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 特征降维和模型训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "# 多次使用pca,确定最后的最优模型\n",
    "\n",
    "def n_components_analysis(n, x_train, y_train, x_val, y_val):\n",
    "    # 记录开始时间\n",
    "    start = time.time()\n",
    "    \n",
    "    # pca降维实现\n",
    "    pca = PCA(n_components=n)\n",
    "    print(\"特征降维,传递的参数为:{}\".format(n))\n",
    "    pca.fit(x_train)\n",
    "    \n",
    "    # 在训练集和测试集进行降维\n",
    "    x_train_pca = pca.transform(x_train)\n",
    "    x_val_pca = pca.transform(x_val)\n",
    "    \n",
    "    # 利用svc进行训练\n",
    "    print(\"开始使用svc进行训练\")\n",
    "    ss = svm.SVC()\n",
    "    ss.fit(x_train_pca, y_train)\n",
    "    \n",
    "    # 获取accuracy结果\n",
    "    accuracy = ss.score(x_val_pca, y_val)\n",
    "    \n",
    "    # 记录结束时间\n",
    "    end = time.time()\n",
    "    print(\"准确率是:{}, 消耗时间是:{}s\".format(accuracy, int(end-start)))\n",
    "    \n",
    "    return accuracy "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "特征降维,传递的参数为:0.7\n",
      "开始使用svc进行训练\n",
      "准确率是:0.9761904761904762, 消耗时间是:12s\n",
      "特征降维,传递的参数为:0.7374999999999999\n",
      "开始使用svc进行训练\n",
      "准确率是:0.9779761904761904, 消耗时间是:12s\n",
      "特征降维,传递的参数为:0.7749999999999999\n",
      "开始使用svc进行训练\n",
      "准确率是:0.9783333333333334, 消耗时间是:16s\n",
      "特征降维,传递的参数为:0.8125\n",
      "开始使用svc进行训练\n",
      "准确率是:0.9798809523809524, 消耗时间是:18s\n",
      "特征降维,传递的参数为:0.85\n",
      "开始使用svc进行训练\n",
      "准确率是:0.9803571428571428, 消耗时间是:22s\n"
     ]
    }
   ],
   "source": [
    "# 传递多个n_components,寻找合理的n_components:\n",
    "\n",
    "n_s = np.linspace(0.70, 0.85, num=5)\n",
    "accuracy = []\n",
    "\n",
    "for n in n_s:\n",
    "    tmp = n_components_analysis(n, x_train, y_train, x_val, y_val)\n",
    "    accuracy.append(tmp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD6CAYAAACoCZCsAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAHZpJREFUeJzt3Xu0lVW5x/HvI4qA0JZbauaFDsatwmqjEoSIgqKg5YUKUUqS9OAlbxADbdjBhmmooYdM8nBSQStFUzYiIlfhiLpRwdAMSTN1yCUSROO6n/PHXLhh7wV77cva813r/X3GYPTyrrm2v0Uwn7nmO9/5mrsjIiLps1/sACIiEocKgIhISqkAiIiklAqAiEhKqQCIiKSUCoCISEqpAIiIpJQKgIhISqkAiIik1P6xA+xLu3bt/Oijj44dQ0SkoCxbtmy9u7evqV2iC8DRRx9NeXl57BgiIgXFzP6eSztNAYmIpJQKgIhISqkAiIiklAqAiEhKqQCIiKSUCoCISEqpAIiIpJQKgIhIUrjD66/DL38J8+bl/T+X6BvBRESK3tatsGgRlJWFX3/7Wzg/Zgz065fX/7QKgIhIY1uzBp58MnT4Tz8NmzdDs2Zw8slw3XVwxhlwxBF5j6ECICKSb+7wyiuVo/wXXgjnDz8czj8fBg0Ko/0WLRo1lgqAiEg+fPIJzJ0bOvyZM+G998AMjjsOxo8PnX737uFcJCoAIiIN5Z13QmdfVhYu4m7ZAi1bwqmnhg5/4EA45JDYKT+lAiAiUlc7d4bpnF1TOytWhPNf+AL86Eeh0+/TB5o2jZtzL1QARERqY+PGcOG2rCxcyF2/Hpo0gd69w/LNQYOgU6eoUzu5UgEQEanJqlWVo/xFi2DHDmjTJkzpDBoUpnhat46dstZUAEREqtq+HRYvruz0//rXcL5bN7jmmtDpn3AC7F/YXWhhpxcRaSjr18OsWaHDnz07TPU0bQonnQSXXx7W5nfoEDtlg1IBEJF0coeVK0OHP2MGLF0KFRVw6KFw7rlhlH/KKWEVT5FSARCR9NiyBebPr1yb//fMo3O//nW44QYYPBi++lXYLx3bpKkAiEhxe//9yrX5zzwTbtBq0QIGDAid/umnw2GHxU4ZhQqAiBSXigp46aUwrVNWFo4BjjoKLrooTO2ceGLYeyflaiwAZtYMeAQ4AlgBXOjuXqVNa+Ax4ADgKXcfb2YHAQ8C7YAl7j7azNpl2h0MzHT3nzTopxGRdNq8OYzud03tfPBBmMbp2RN+8YvQ6XftWhBr8xtTLt8AhgHvuvsgMysD+gNPV2kzFFjp7qPM7Ckz65Bpt9TdbzazmWbWBTgfmAncCrxsZlPc/a8N93FEJDXeequyw58/H7Ztg5KSyrX5p50GbdvGTplouRSAfsD0zPE84CSqFwADWpmZZY6PBT4EjjKzJkBzYFvmZ13u7hVmtjDzs1QARKRmO3bAc89Vrs1/7bVwvnNnuOKK0Ol/4xtwwAFxcxaQXApAW2Bj5ngT0ClLm6nAAEKh2Ero8B8GxpAZ9bv7ajOr+rPaVP1BZjYSGAlw5JFH5vxBRKQI/etfYU1+WVlYo79hQ7j56sQTYeTIsDa/Y8fYKQtWLgVgPVCSOS7J/D6bEe6+zsweBtYCY4G73f1eM3vIzL6R5Wf9veoPcffJwGSA0tJSr/q6iBQxd/jLXyqndhYvDhuutW8PZ54ZRvn9+8NnPhM7aVHIpQDMpXJ03w+4I0ubPsBwMxtKmP5ZCpwKbMm8vhVouetnmdnLwInAxHqlF5HCt20bLFxY/ZGIxx4LY8eGTr9Hj9SszW9MuRSAacDZZrYCWA6sNrMJ7n7tbm1mAT8CngXGu/tmM5sETDOzUcA7hM7/JcIqoPOBGe7+ZgN+FhEpFGvW7Lntwq5HIp5yCoweHdbmN8IjEdPOqqzoTJTS0lIvLy+PHUNE6ssdli/f85GI7vD5z4cR/qBBYc+dRn4kYrEys2XuXlpTO90IJiL59cYbYYuFVavCOvzjj698JOJXvqK1+RGpAIhI/vz5z2FaB2DKlLBq57OfjZtJPqUCICL58dJLYb+dZs3Cw9E7ZVtBLjHpsrqINLylS6FfP2jVKjxBS51/IqkAiEjDWrQorNVv3z4cf+ELsRPJXqgAiEjDeeaZsAfPEUeEzl9LORNNBUBEGkZZWVjZ88UvwoIFqd1jv5CoAIhI/T36KJx9dljWOW+eVvoUCBUAEamfBx+EIUPCdg1z5kCbans8SkKpAIhI3U2ZAsOGQZ8+YUuHkpKa3yOJoQIgInXz61/DiBFw6qlh586WLWMnklpSARCR2rv9dhg1Cs46C/70J2jePHYiqQMVABGpnZtugmuuCfP+Dz8MBx4YO5HUkQqAiOTGHa6/Hm64AS68EKZN0+MXC5z2AhKRmrnDtdeGqZ+RI+Huu/WAliKgAiAi+1ZRAZddFjr9K66AX/1KWzgXCZVwEdm7nTvhhz8Mnf+YMer8i4y+AYhIdtu3w/Dh8NBD8LOfhbl/df5FRQVARKrbtg2++1147DG45ZbwnF4pOioAIrKnLVvg3HPDzV0TJ4Z5fylKKgAiUunjj+Fb3wpP8LrnnrDiR4qWCoCIBB99FJ7Zu2QJ3HcfXHBB7ESSZyoAIgIffhge5LJsWbjoO2RI7ETSCFQARNJu/frw8PaVK+GRR8L+PpIKKgAiafbBB+H5vW++CU88EXb2lNRQARBJq3ffhZNPhvfegyefhJNOip1IGpkKgEgavf029OsXpn9mz4ZevWInkghUAETSZtWqMPLfvDks9+zRI3YiiUQFQCRNXnstdP47d8L8+dC9e+xEEpE2gxNJi+XL4cQTw34+Cxao8xcVAJFUePHFcJG3eXNYuBC6do2dSBJABUCk2C1ZEqZ9WreGRYvgmGNiJ5KEUAEQKWbz54e1/YcdFjr/o4+OnUgSRAVApFg99RScfnro9BcuhMMPj51IEkYFQKQYPf44nHkmdOkSLvgeemjsRJJAKgAixeaPfwz7+X/tazBvHrRrFzuRJJQKgEgxuf9++N73oGdPmDMHDj44diJJMBUAkWIxeXJ4hm+/fjBrFrRqFTuRJJwKgEgxuPNO+NGPwgNdZsyAgw6KnUgKwD4LgJk1M7MyM1tuZg+YmWVp09rMFpjZEjO7IXOur5ktzvz6h5kNz9ZORBrALbfAlVfC2WfDo49Cs2axE0mBqOkbwDDgXXfvDrQG+mdpMxRY6e69gF5m1sHdF7h7b3fvDawAXs7WruE+hkgKucONN8JPfhLm/f/wB2jaNHYqKSA1FYB+wJzM8Twg24bhBrTKfDsw4NhPXzBrAXR09xX7aiciteQeOv6f/Qx+8AN44AHYX3s7Su3UVADaAhszx5uANlnaTAUOBqYDW4Hmu73WH5ibQzsRyVVFRZjyufVWuPRSuPdeaNIkdiopQDUVgPVASea4JPP7bEa4+9mEjn3tbucHA2U5tPuUmY00s3IzK1+3bl1N+UXSpaICLrkE7roLrr4aJk2C/bSWQ+qmpr85c4EBmeN+wPwsbfoAvzGzAwnTOksBMlM9fQlTR3ttV5W7T3b3Uncvbd++fS0+ikiR27EDvv99+O1vYdw4mDAhbO0sUkc1FYBpwOFmtgLYAKw2swlV2swCmgHPAuPdfXPmfA/gNXffUkM7EanJ9u0wdGiY67/ppvBLnb/Uk7l77Ax7VVpa6uXl5bFjiMS1dSsMGQJPPAG33RamfkT2wcyWuXtpTe20bEAkyT75JKzvnz07zPf/53/GTiRFRAVAJKk2b4bBg8NWzv/zP3DRRbETSZFRARBJoo0bw17+zz8PU6eG+X+RBqYCIJI0GzaEp3gtXx7u7j3nnNiJpEipAIgkydq10L8/vPEGPPZY2NxNJE9UAESS4v334ZRT4O23oawsHIvkkQqASBK8807Yx3/NmvAs3z59YieSFFABEIlt9Wo4+WT48MPwFK8TToidSFJCBUAkpr/8JXT+W7eG5/d+7WuxE0mKqACIxPLqq5Xz/AsWwJe+FDWOpI+2ERSJ4aWXoG/fsIf/woXq/CUKFQCRxrZ0abjg26oVLFoEnTvHTiQppQIg0pgWLQrr/Nu3D8f/8R+xE0mKqQCINJY5c+C00+Dznw/TPkceGTuRpJwKgEhjKCsLG7sdc0zo/D/3udiJRFQARPJu+nT49rfhy1+G+fPhs5+NnUgEUAEQya8HH4TvfAeOOw6eeQbatImdSORTKgAi+TJlCgwbBt/8ZnigS0lJ7EQie1ABEMmHSZNgxAgYMABmzoSWLWMnEqlGBUCkod12G1x2GZx5Jjz+OLRoETuRSFYqACIN6aab4Npr4bzz4JFH4MADYycS2SsVAJGG4A7jxsENN8AFF4SLvwccEDuVyD5pMziR+nKHa66BO+6Aiy+G3/wG9tPYSpJPf0tF6qOiAkaNCp3/5ZfDPfeo85eCob+pInW1c2dY6XP33TB6NEycCGaxU4nkTAVApC62bw9z/b/7Hdx4I/ziF+r8peDoGoBIbWzaBM8/D3fdBTNmhI5/zJjYqUTqRAVAZG8qKuCvf4Xnnqv8tXJluOjbpEmY8rniitgpRepMBUBkl02b4IUXKjv7pUvhX/8Krx18cHhY+3nnQc+eYW8fbe0gBU4FQNLJvfro/s9/DufNoGtXOOec0Nn37AmdOml1jxQdFQBJh48+qj6637AhvFZSEkb3uzr844/X6F5SQQVAio87rFpVfXRfURFe79o17M+/a3TfubNG95JKKgBS+DZvrj66/+c/w2slJWFEv6vDP/74MJ8vIioAUmDc4c039xzdv/pq5ei+Sxc466zK0X2XLhrdi+yFCoAk2+bN8OKLe47u168Pr33mM2FEf/31laP71q3j5hUpICoAkhzusHr1nqP7FSsqR/edO4cHq+8+um/SJG5mkQKmAiDxfPxx9dH9unXhtVatwoh+3LjK0b2epyvSoFQApHG4w9/+Vn10v3NneL1TJzjjjMrRfdeuGt2L5JkKgOTHJ59UH92vXRtea9kyjOjHjg2d/QknaHQvEoEKgNSfO7z11p6j++XLK0f3X/wiDBxYObrv1k2je5EEqLEAmFkz4BHgCGAFcKG7e5U2rYHHgAOAp9x9vJn1BW7KNDkKuD7zcx4E2gFL3H10A30OaUyffALl5Xt2+LuP7o87Dn7yk8rRfdu2cfOKSFa5fAMYBrzr7oPMrAzoDzxdpc1QYKW7jzKzp8ysg7svAHoDmNlM4GXgfGCpu99sZjPNrIu7v95gn0Yanju8/Xb10f2OHeH1Y46B006rHN1/6Usa3YsUiFwKQD9geuZ4HnAS1QuAAa3MzDLHxwJvAZhZC6Cju68ws87AUWbWBGgObKv/R5AG9e9/Vx/dr1kTXjvooDC6Hz26cnTfrl3cvCJSZ7kUgLbAxszxJqBTljZTgQGEQrGV0Lnv0h+Ymzl+DBhD+CYw091XV/1BZjYSGAlw5JFH5hBP6m3jRhg/HhYuhFdeqRzdd+wIAwbsObrfX5eNRIpFLv+a1wO7tkYsyfw+mxHuvs7MHgbW7nZ+MPBo5ngscLe732tmD5nZN9z9/3b/Ie4+GZgMUFpause1BsmDLVvC1gmLF8M3vwnXXVc5um/fPnY6EcmjXArAXCpH9/2AO7K06QMMN7OhhOmfpQCZKaG+wGWZdq2ALZnjrUDLugaXBrBzZ3iu7cKFMHUqnH9+7EQi0ohy2SVrGnC4ma0ANgCrzWxClTazgGbAs8B4d9+cOd8DeM3dd3X6k4BLzew5wjTRXCQOd7jqKnjkEZgwQZ2/SApZlRWdiVJaWurl5eWxYxSnW24JSzWvvhpuuy12GhFpQGa2zN1La2qnfXLT6L77Quf/ve/BL38ZO42IRKICkDazZsGIEXDKKfC732mvfJEU07/+NHnhBTj3XPjKV2D6dGjaNHYiEYlIBSAtVq0Ku20ecgg8+WR4mIqIpJoKQBp88AGcemo4nj0bDj00bh4RSQTd1lnsNm2C008P2zksWBD27hERQQWguG3bBmefHR6aPmMG9OgRO5GIJIgKQLGqqIDvfx/mzg3LPk87LXYiEUkYXQMoVtddBw89BDffDBdeGDuNiCSQCkAxuu02uP12uPxyGDMmdhoRSSgVgGIzbRpcey2cdx7ccQeYxU4kIgmlAlBMnn46zPv37Qv3368nc4nIPqkAFIuXXoJzzoGuXeFPf4JmzWInEpGEUwEoBqtXw8CB0KZN2OunpKTm94hI6qkAFLq1a8MSzx07wl2+n/tc7EQiUiB0H0Ah27w57O/z3nthvX/nzrETiUgBUQEoVNu3h509X345zPn37Bk7kYgUGBWAQuQe9vSfPRvuvRcGDYqdSEQKkK4BFKKxY+GBB2D8+FAIRETqQAWg0EycGJ7ne+mlMG5c7DQiUsBUAArJH/4AV10Vdvi86y7d5Ssi9aICUCjmzQubuvXqBVOn6i5fEak3FYBC8Mor8K1vhYe5PPEENG8eO5GIFAEVgKR7++1wl29JSbjLt3Xr2IlEpEhoGWiSrV8fnuW7ZQssXgxHHBE7kYgUERWApPr447C+/513YM4c6NYtdiIRKTIqAEm0fTsMGQIvvgjTp0Pv3rETiUgRUgFIGne45BJ48km4++5w8VdEJA90EThpfvpTmDIl/O8ll8ROIyJFTAUgSX79a7jpJvjhD+HGG2OnEZEipwKQFI8+CpddBoMHh6kf3eUrInmmApAEixbB0KFwwgnw+9/D/ro0IyL5pwIQ26uvwplnQocOMGMGtGgRO5GIpIQKQEzvvBPu8j3oIHjqKWjbNnYiEUkRzTXEsmFDeJbvRx+Fu3yPOip2IhFJGRWAGP7973Cxd/Xq8FSvL385diIRSSEVgMa2Ywd897vw3HNhf/++fWMnEpGUUgFoTO4walTY0vnOO+G882InEpEU00XgxvRf/wWTJ4dn+l5+eew0IpJy+ywAZtbMzMrMbLmZPWBW/e4kM2ttZgvMbImZ3ZA519fMFmd+/cPMhmc7l68PlUiTJ4e7e4cPh5//PHYaEZEavwEMA9519+5Aa6B/ljZDgZXu3gvoZWYd3H2Bu/d2997ACuDlbOca8HMk2+OPh4e4DxwIv/2t7vIVkUSoqQD0A+ZkjucBJ2VpY0CrzLcDA4799AWzFkBHd1+xr3NFbcmScNH361+Hhx+GAw6InUhEBKi5ALQFNmaONwFtsrSZChwMTAe2Ars/sLY/MLdK+2znitPrr4flnkccATNnhhu+REQSoqYCsB4oyRyXZH6fzQh3P5tQANbudn4wUFalbbZznzKzkWZWbmbl69atqyFegr33Xnic44EHhrX+7dvHTiQisoeaCsBcYEDmuB8wP0ubPsBvzOxAwvTPUoDMlFBfwtQReztXlbtPdvdSdy9tX6id5ocfhrt8P/wwPNilQ4fYiUREqqmpAEwDDjezFcAGYLWZTajSZhbQDHgWGO/umzPnewCvufuW3dpmO1dctmyBs86CN96Axx6Dr341diIRkazM3WNn2KvS0lIvLy+PHSN3O3fCd74TnuP70EPh4q+ISCMzs2XuXlpTO90I1lDc4corQ+d/++3q/EUk8VQAGsrNN8OkSXDttXDVVbHTiIjUSAWgIfzv/8K4cXD++XDLLbHTiIjkRAWgvmbOhIsvhv79YcoU2E9/pCJSGNRb1cfzz4cdPbt3D3P/TZvGTiQikjMVgLp64w044ww47LCw1r9Vq9iJRERqRQWgLt5/P9zlu99+4S7fQw6JnUhEpNb0QJja2rgRTj8d1q+HBQugY8fYiURE6kQFoDa2boVvfxtWroSyMiit8T4LEZHEUgHIVUUFXHghzJ8P998fpoBERAqYrgHkwh2uvhr++Ee49Va44ILYiURE6k0FIBcTJsDEifDjH4c7fUVEioAKQE0eeABGjw6bvN12mx7nKCJFQwVgX2bPhosugpNOgvvu012+IlJU1KPtTXk5nHMOdOsW9vU/8MDYiUREGpQKQDZvvhnW+rdrB7NmQUlJze8RESkwKgBVrVkTlnhWVIQpoMMOi51IRCQvdB/A7j76KIz8P/gA5s2DTp1iJxIRyRsVgF22bQtz/suXw+OPw/HHx04kIpJXKgAQpnsuugjmzAl7+p9xRuxEIiJ5p2sAAGPGwLRp8POfww9+EDuNiEijUAG4/fZwp++oUTB2bOw0IiKNJt0F4KGH4Jprwtz/xIm6y1dEUiW9BWDuXBg+HPr0galToUmT2IlERBpVOgvAyy+Hff07dQorfpo1i51IRKTRpa8AvPUWDBwIBx8MTz0V/ldEJIXStQx03bpwl++2beHBLocfHjuRiEg06SkAmzeH9f3/+Ac88wx06RI7kYhIVOkoANu3w5AhsGwZPPoo9OoVO5GISHTFXwDc4eKLw66e99wDZ50VO5GISCIU/0XgcePCw1xuvBFGjoydRkQkMYq7ANx1F9x8c+j4f/rT2GlERBKleAvAww/DlVeGKZ9Jk3SXr4hIFcVZABYsgGHDoGfPsN3D/sV/qUNEpLaKswC0bw99+8KMGdC8eew0IiKJVJxD427dwuMcRURkr4rzG4CIiNRIBUBEJKVUAEREUkoFQEQkpWosAGbWzMzKzGy5mT1gVn1BvZm1NrMFZrbEzG7InOtrZoszv/5hZsMz50eb2VIzm2VmTRv+I4mISC5y+QYwDHjX3bsDrYH+WdoMBVa6ey+gl5l1cPcF7t7b3XsDK4CXzewLQDd3PwGYBXy+YT6GiIjUVi4FoB8wJ3M8DzgpSxsDWmW+HRhw7KcvmLUAOrr7CuBkoLWZLQK+CbxVj+wiIlIPuRSAtsDGzPEmoE2WNlOBg4HpwFZg97uv+gNzM8ftgXXu3ocw+u9d9QeZ2UgzKzez8nXr1uX0IUREpPZyuRFsPVCSOS7J/D6bEe6+zsweBtbudn4w8GjmeBPwRub4b0C1R3K5+2RgMoCZrTOzv+eQMZt2+8iaFMpYf0nPB8nPmPR8kPyMSct3VC6NcikAc4EBhNF9P+COLG36AMPNbChh+mcpQGZKqC9wWabdMuCqzHFHQhHYK3dvn0O+rMys3N1L6/r+xqCM9Zf0fJD8jEnPB8nPmPR8e5PLFNA04HAzWwFsAFab2YQqbWYBzYBngfHuvjlzvgfwmrtvAXD354B/mtmLwBvu/kJDfAgREam9Gr8BuPtWYFCV09dWabMdOCPLe18Azqxy7tLaxxQRkYZWzDeCTY4dIAfKWH9JzwfJz5j0fJD8jEnPl5W5e+wMIiISQTF/AxARkX0oyAKQ4/YU1baiyOV9MfNlzt+X2SrjCTPL2/Ma6pMx89rVZvZMEvM11nYj9fh7eJCZPZ7ZOuXWfOWrRcZqecysnZk9a2avmtkvEpivmZnNMLMXzSyv0y91zbjba3ea2b35zFhXBVkAyGF7imxbUeTyvpj5zKw3sH9mq4zPEJbf5ktd/wwxs6OA4VXbJyGfNe52I3X9MzwfWJrZOqWbmXWJmXEveX4MzAS6AwPN7IsJy3cusMLdewD9zaxrnvLVJyNmdhxwWh6z1UuhFoBctqcAqm1FkfP7IuVbA0zMvJTv/2/qmhFCxrH5jVfnfI253UhdM34ItDSzJoS75rdFzpgtTz9gjrtXAAv38r6Y+V4HHsi83iRP2eqV0cwOAG4Brs9zvjor1AKQy/YUu+y+FUVt3lcfdcrn7qvc/QUz+zZQATydp3x1zmjhZr/lwGt5zAZ1//+4xu1GGlBdMz5GGBWuBl5399V5S5hbxmx5kvRvpVo+d1/m7n8xsx8Di909n38f6/pneB1wP3vujJAohVoAct2eAsJWFGV1eF991DUfZnYmcAUw2N135Ckf1D3jIMIo+/fA183ssr2+K06+GrcbaUB1zTgWuNvdjwbamNk38pYwt4zZ8iTp30rWPy8zu5TwLS/f05F1zXhaJtuvgDPM7Nw856y1Qi0Au7angPD1bH62RpmLNX0JX9tyfl+sfGZ2KGHUMMjdP8pTtnpldPehmfns7wLL3P2/k5SPsN3Irlvya9xupJ7qmrEVsCVzvBVomb+IOWXMlmcuMMDM9gNO3Mv7ouUzs+6Em0+HZG5Ezac6ZXT3Pu7el8z1FHd/JM85a61QC0Au21NAla0osrxvbpb3xMw3HDgMmJ1ZNXJRnvLVJ2NjqVO+Rt5upK5/hpOAS83sOcJ8cb7+HuaaMVueO4HTCReuZ7r7mwnLdwlwNLAw829lYJ7y1Sdj4ulGMBGRlCrUbwAiIlJPKgAiIimlAiAiklIqACIiKaUCICKSUioAIiIppQIgIpJS/w8MUr4TArIQgAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 准确率可视化展示\n",
    "plt.plot(n_s, np.array(accuracy), \"r\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "经过图形展示,选择合理的n_components, 最后综合考虑确定结果为:0.80"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 确定最优模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "43"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca = PCA(n_components=0.80)\n",
    "\n",
    "pca.fit(x_train)\n",
    "pca.n_components_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train_pca = pca.transform(x_train)\n",
    "x_val_pca = pca.transform(x_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(33600, 43) (8400, 43)\n"
     ]
    }
   ],
   "source": [
    "print(x_train_pca.shape, x_val_pca.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.979047619047619"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 训练比较优的模型,计算accuracy\n",
    "\n",
    "ss1 = svm.SVC()\n",
    "\n",
    "ss1.fit(x_train_pca, y_train)\n",
    "\n",
    "ss1.score(x_val_pca, y_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
